knitr::opts_knit$set(root.dir = "/home/francescoc/Desktop/scGraphVerse/data/",message=FALSE, warning=FALSE)

Load Libraries and data

library(RColorBrewer)
library(DT)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(Seurat)
## Loading required package: SeuratObject
## Loading required package: sp
## 
## Attaching package: 'SeuratObject'
## 
## The following object is masked from 'package:DT':
## 
##     JS
## 
## The following objects are masked from 'package:base':
## 
##     intersect, t
## 
## 
## Attaching package: 'Seurat'
## 
## The following object is masked from 'package:DT':
## 
##     JS
library(STRINGdb)
library(SingleCellExperiment)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## 
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## 
## Attaching package: 'MatrixGenerics'
## 
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## 
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## 
## The following object is masked from 'package:SeuratObject':
## 
##     intersect
## 
## The following objects are masked from 'package:lubridate':
## 
##     intersect, setdiff, union
## 
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## 
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## 
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, saveRDS, setdiff,
##     table, tapply, union, unique, unsplit, which.max, which.min
## 
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## 
## The following objects are masked from 'package:lubridate':
## 
##     second, second<-
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## 
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The following object is masked from 'package:utils':
## 
##     findMatches
## 
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## 
## The following object is masked from 'package:sp':
## 
##     %over%
## 
## The following object is masked from 'package:lubridate':
## 
##     %within%
## 
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## 
## The following object is masked from 'package:purrr':
## 
##     reduce
## 
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## 
## 
## Attaching package: 'Biobase'
## 
## The following object is masked from 'package:MatrixGenerics':
## 
##     rowMedians
## 
## The following objects are masked from 'package:matrixStats':
## 
##     anyMissing, rowMedians
## 
## 
## Attaching package: 'SummarizedExperiment'
## 
## The following object is masked from 'package:Seurat':
## 
##     Assays
## 
## The following object is masked from 'package:SeuratObject':
## 
##     Assays
library(scGraphVerse)
## Warning: replacing previous import 'igraph::components' by 'Seurat::components'
## when loading 'scGraphVerse'
library(BiocParallel)
library(GENIE3)

reticulate::use_python("/usr/bin/python3", required = TRUE)
arboreto <- reticulate::import("arboreto.algo")
pandas <- reticulate::import("pandas")
numpy <- reticulate::import("numpy")

time <- list()

ddir <- "/home/francescoc/Desktop/NodeVerse/analysis/GENIE3/data"
pdir <- "/home/francescoc/Desktop/NodeVerse/analysis/GENIE3/plots"

Adjacency and Count matrices

adjm <- as.matrix(read.table("./../analysis/adjm_top500.txt"))
colnames(adjm) <- rownames(adjm)

as.data.frame(adjm) %>%
  datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Ground Truth")
count_matrices <- readRDS("./../analysis/sim_n200p200.RDS")
dim(count_matrices[[1]])
## [1] 200 217
count_matrices <- lapply(count_matrices, t)

as.data.frame(count_matrices[[1]]) %>%
  slice_head(n=10) %>%
  datatable(extensions = 'Buttons',
            options = list(
              dom = 'Bfrtip',
              buttons = c('csv', 'excel'),
              scrollX = TRUE,
              pageLength = 10), 
            caption = "Simulated Count matrix")

GENIE3

Late integration

set.seed(1234)
time[["GENIE3_late_15Cores"]] <- system.time(
  genie3_late <- infer_networks(count_matrices, 
                                method="GENIE3",
                                nCores = 15)
)

genie3_late[[1]] %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 output")

Symmetrize and ROC

genie3_late_wadj <- generate_adjacency(genie3_late)
sgenie3_late_wadj <- symmetrize(genie3_late_wadj, weight_function = "mean")

as.data.frame(sgenie3_late_wadj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetric weighted adjacency")
genie3_late_auc <- plotROC(sgenie3_late_wadj, adjm, plot_title = "ROC curve - GENIE3 Late Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgenie3_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                                     weighted_adjm_list = sgenie3_late_wadj, 
                                     n = 3,
                                     method = "GENIE3",
                                     nCores = 15)

as.data.frame(sgenie3_late_adj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 symmetric adjacency")
scores.genie3.late.all <- pscores(adjm, sgenie3_late_adj)
## Registered S3 methods overwritten by 'fmsb':
##   method    from
##   print.roc pROC
##   plot.roc  pROC

scores.genie3.late.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores late")
plotg(sgenie3_late_adj)

## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]

Consensus

consesusm <- create_consensus(sgenie3_late_adj, method="vote")
consesusu <- create_consensus(sgenie3_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgenie3_late_adj, weighted_list = sgenie3_late_wadj, method = "INet", threshold = 0.05, ncores = 15)
## [1] 0.3706143
## [1] 0.1067947
## [1] 0.02754931
scores.genie3.late <- pscores(adjm, list(consesusm,consesusu,consesunet))

scores.genie3.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote-union-iNet")

Plot comparison

compare_consensus(consensus_matrix = consesusm, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesusu, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesunet, reference_matrix = adjm, false_plot = F)

Community detection

adj_comm <- community_path(adjm)
## Registered S3 method overwritten by 'DescTools':
##   method         from  
##   reorder.factor gplots
## 
## 
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following object is masked from 'package:GenomicRanges':
## 
##     union
## The following object is masked from 'package:IRanges':
## 
##     union
## The following object is masked from 'package:S4Vectors':
## 
##     union
## The following objects are masked from 'package:BiocGenerics':
## 
##     normalize, path, union
## The following object is masked from 'package:Seurat':
## 
##     components
## The following objects are masked from 'package:lubridate':
## 
##     %--%, union
## The following objects are masked from 'package:dplyr':
## 
##     as_data_frame, groups, union
## The following objects are masked from 'package:purrr':
## 
##     compose, simplify
## The following object is masked from 'package:tidyr':
## 
##     crossing
## The following object is masked from 'package:tibble':
## 
##     as_data_frame
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## 
## Attaching package: 'ggraph'
## The following object is masked from 'package:sp':
## 
##     geometry
## clusterProfiler v4.14.4 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## S Xu, E Hu, Y Cai, Z Xie, X Luo, L Zhan, W Tang, Q Wang, B Liu, R Wang,
## W Xie, T Wu, L Xie, G Yu. Using clusterProfiler to characterize
## multiomics data. Nature Protocols. 2024, 19(11):3292-3320
## 
## Attaching package: 'clusterProfiler'
## The following object is masked from 'package:igraph':
## 
##     simplify
## The following object is masked from 'package:IRanges':
## 
##     slice
## The following object is masked from 'package:S4Vectors':
## 
##     rename
## The following object is masked from 'package:purrr':
## 
##     simplify
## The following object is masked from 'package:stats':
## 
##     filter
## Loading required package: AnnotationDbi
## 
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:clusterProfiler':
## 
##     select
## The following object is masked from 'package:dplyr':
## 
##     select
## ReactomePA v1.50.0 Learn more at https://yulab-smu.top/contribution-knowledge-mining/
## 
## Please cite:
## 
## Guangchuang Yu, Qing-Yu He. ReactomePA: an R/Bioconductor package for
## reactome pathway analysis and visualization. Molecular BioSystems.
## 2016, 12(2):477-479
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## Reading KEGG annotation online: "https://rest.kegg.jp/link/hsa/pathway"...
## Reading KEGG annotation online: "https://rest.kegg.jp/list/pathway/hsa"...
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
comm_consesusm <- community_path(consesusm)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
comm_consesusu <- community_path(consesusu)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
comm_consesunet <- community_path(consesunet)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
community_similarity(adj_comm,list(comm_consesusm, comm_consesusu, comm_consesunet))

Early integration

count_matrices <- lapply(count_matrices, as.matrix)
early_matrix <- list(earlyj(count_matrices, rowg = T))

set.seed(1234)
time[["GENIE3_early_15Cores"]] <- system.time(
  genie3_early <- infer_networks(early_matrix, method="GENIE3", nCores = 15)
)

as.data.frame(genie3_early[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 early output")

Symmetrize and ROC

genie3_early_wadj <- generate_adjacency(genie3_early)
sgenie3_early_wadj <- symmetrize(genie3_early_wadj, weight_function = "mean")

as.data.frame(sgenie3_early_wadj[[1]]) %>%
  slice_head(n=30) %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 early symmetric weighted adjacency")
genie3_early_auc <- plotROC(sgenie3_early_wadj, adjm, plot_title = "ROC curve - GENIE3 Early Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgenie3_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                                      weighted_adjm_list = sgenie3_early_wadj, 
                                      n = 2,
                                      method = "GENIE3",
                                      nCores = 15)

as.data.frame(sgenie3_early_adj[[1]]) %>%
  slice_head(n=30) %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GENIE3 early symmetric adjacency")
scores.genie3.early <- pscores(adjm, sgenie3_early_adj)

scores.genie3.early$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores early")
plotg(sgenie3_early_adj)

## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]

Plot comparison

compare_consensus(consensus_matrix = sgenie3_early_adj[[1]], reference_matrix = adjm, false_plot = F)

comm_consesusm <- community_path(sgenie3_early_adj[[1]])
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
community_similarity(adj_comm,list(comm_consesusm))

GRNBoost2

Late integration

set.seed(1234)
time[["grnboost_late"]] <- system.time(
  grnboost_late <- infer_networks(count_matrices, 
                                method="GRNBoost2",
                                nCores = 15)
)
## /home/francescoc/.local/lib/python3.8/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.
## Perhaps you already have a cluster running?
## Hosting the HTTP server on port 40811 instead
##   warnings.warn(
## /home/francescoc/.local/lib/python3.8/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.
## Perhaps you already have a cluster running?
## Hosting the HTTP server on port 32833 instead
##   warnings.warn(
as.data.frame(grnboost_late[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 late output")

Symmetrize and ROC

grnboost_late_wadj <- generate_adjacency(grnboost_late)
sgrnboost_late_wadj <- symmetrize(grnboost_late_wadj, weight_function = "mean")

as.data.frame(sgrnboost_late_wadj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetric weighted adjacency")
grnboost_late_auc <- plotROC(sgrnboost_late_wadj, adjm, plot_title = "ROC curve - grnboost Late Integration")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgrnboost_late_adj <- cutoff_adjacency(count_matrices = count_matrices,
                                     weighted_adjm_list = sgrnboost_late_wadj, 
                                     n = 3,
                                     method = "GRNBoost2",
                                     nCores = 15)

as.data.frame(sgrnboost_late_adj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetric adjacency")
scores.grnboost.late.all <- pscores(adjm, sgrnboost_late_adj)

scores.grnboost.late.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores late")
plotg(sgrnboost_late_adj)

## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]

Consensus

consesusm <- create_consensus(sgrnboost_late_adj, method="vote")
consesusu <- create_consensus(sgrnboost_late_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sgrnboost_late_adj, weighted_list = sgrnboost_late_wadj, method = "INet", threshold = 0.05)
## [1] 0.3861035
## [1] 0.1119832
## [1] 0.02891771
scores.grnboost.late <- pscores(adjm, list(consesusm,consesusu,consesunet))

scores.grnboost.late$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote-union-iNet")

Plot comparison

compare_consensus(consensus_matrix = consesusm, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesusu, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesunet, reference_matrix = adjm, false_plot = F)

Community detection

comm_consesusm <- community_path(consesusm)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
comm_consesusu <- community_path(consesusu)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
comm_consesunet <- community_path(consesunet)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
community_similarity(adj_comm,list(comm_consesusm, comm_consesusu, comm_consesunet))

Early integration

early_matrix <- list(earlyj(count_matrices))

set.seed(1234)
time[["grnboost_early"]] <- system.time(
  grnboost_early <- infer_networks(early_matrix, method="GRNBoost2")
)

as.data.frame(grnboost_early[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 early output")

Symmetrize and ROC

grnboost_early_wadj <- generate_adjacency(grnboost_early)
sgrnboost_early_wadj <- symmetrize(grnboost_early_wadj, weight_function = "mean")

as.data.frame(sgrnboost_early_wadj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetric weighted adjacency")
grnboost_early_auc <- plotROC(sgrnboost_early_wadj, adjm, plot_title = "ROC curve - grnboost Early Integration")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

Cutoff

sgrnboost_early_adj <- cutoff_adjacency(count_matrices = early_matrix,
                                      weighted_adjm_list = sgrnboost_early_wadj, 
                                      n = 2,
                                      method = "GRNBoost2",
                                      nCores = 15)
as.data.frame(sgrnboost_early_adj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "GRNBoost2 symmetric adjacency")
scores.grnboost.early <- pscores(adjm, sgrnboost_early_adj)

scores.grnboost.early$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores early")
plotg(sgrnboost_early_adj)

## TableGrob (1 x 1) "arrange": 1 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]

Plot comparison

compare_consensus(consensus_matrix = sgrnboost_early_adj[[1]], reference_matrix = adjm, false_plot = F)

Community detection

comm_consesusm <- community_path(sgrnboost_early_adj[[1]])
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
community_similarity(adj_comm,list(comm_consesusm))

Joint Integration

Joint Random Forest

#https://cran.r-project.org/src/contrib/Archive/JRF/
#install.packages("/home/francescoc/Downloads/JRF_0.1-4.tar.gz", repos = NULL, type = "source")
set.seed(1234)
time[["JRF"]] <- system.time(
  jrf_mat <- infer_networks(count_matrices, method="JRF", nCores = 15)
)
 
as.data.frame(jrf_mat[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

Prepare the output

jrf_list <- list()

importance_columns <- grep("importance", names(jrf_mat[[1]]), value = TRUE)

for (i in seq_along(importance_columns)) {
# Select the 'gene1', 'gene2', and the current 'importance' column
  df <- jrf_mat[[1]][, c("gene1", "gene2", importance_columns[i])]
  
  # Rename the importance column to its original name (e.g., importance1, importance2, etc.)
  names(df)[3] <- importance_columns[i]
  
  # Add the data frame to the output list
  jrf_list[[i]] <- df
}

saveRDS(jrf_list, "./../analysis/jrf.RDS")

as.data.frame(jrf_list[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF output")

symmetrize Output and ROC

jrf_wadj <- generate_adjacency(jrf_list)
sjrf_wadj <- symmetrize(jrf_wadj, weight_function = "mean")
jrf_auc_mine <- plotROC(sjrf_wadj, adjm, plot_title = "ROC curve - JRF Late Integration", is_binary = F)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases

as.data.frame(sjrf_wadj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF symmetrize output")

Generate Adjacency and Apply Cutoff

sjrf_adj <- cutoff_adjacency(count_matrices = count_matrices,
                 weighted_adjm_list = sjrf_wadj, 
                 n = 3,
                 method = "JRF")

as.data.frame(sjrf_adj[[1]]) %>%
  slice_head(n=30) %>% 
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "JRF adjacency")

Comparison with the Ground Truth

scores.jrf.all <- pscores(adjm, sjrf_adj)

scores.jrf.all$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores")
plotg(sjrf_adj)

## TableGrob (2 x 2) "arrange": 3 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
consesusm <- create_consensus(sjrf_adj, method="vote")
consesusu <- create_consensus(sjrf_adj, method="union")
consesunet <- create_consensus(adj_matrix_list = sjrf_adj, weighted_list = sjrf_wadj, method = "INet", threshold = 0.1, ncores = 15)
## [1] 0.2859879
## [1] 0.07594979
scores.jrf <- pscores(adjm, list(consesusm, consesusu, consesunet))

scores.jrf$Statistics %>%
    datatable(extensions = 'Buttons',
              options = list(
                dom = 'Bfrtip',
                buttons = c('csv', 'excel'),
                scrollX = TRUE,
                pageLength = 10), 
              caption = "scores vote-union-iNet")
compare_consensus(consensus_matrix = consesusm, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesusu, reference_matrix = adjm, false_plot = F)

compare_consensus(consensus_matrix = consesunet, reference_matrix = adjm, false_plot = F)

Community detection

comm_consesusm <- community_path(consesusm)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
comm_consesusu <- community_path(consesusu)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
comm_consesunet <- community_path(consesunet)
## [1] "Unweighted Network Parallel Function"
## Detected robin method type independent
## It can take time ... It depends on the size of the network.

## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
## 'select()' returned 1:1 mapping between keys and columns
community_similarity(adj_comm,list(comm_consesusm, comm_consesusu, comm_consesunet))

Method Comparison

time_data <- data.frame(
  Method = names(time),
  Time_in_Seconds = sapply(time, function(x) {
    if ("elapsed" %in% names(x)) x["elapsed"] else NA
  })
)

time_data$Time_in_Minutes <- as.numeric(time_data$Time_in_Seconds) / 60
time_data$Time_in_Hours <- as.numeric(time_data$Time_in_Seconds) / 3600

time_data <- time_data[order(time_data$Time_in_Hours), ]
time_data$Method <- factor(time_data$Method, levels = time_data$Method)

time_data <- time_data %>%
  mutate(Method_Group = case_when(
    grepl("GENIE3", Method) ~ "GENIE3",
    grepl("GRNBoost2", Method) ~ "GRNBoost2",
    #grepl("ZILGM", Method) ~ "ZILGM",
    grepl("JRF", Method) ~ "JRF"#,
    #grepl("PCzinb", Method) ~ "PCzinb"
  ))

method_colors <- c("GENIE3" = "darkblue",
                   "GRNBoost2" = "darkgreen",
                   #"ZILGM" = "orange",
                   "JRF" = "red"#,
                   #"PCzinb" = "purple"
                   )

time_plot <- ggplot(time_data, aes(x = Method, y = Time_in_Hours, fill = Method_Group)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = sprintf("%.1f min", Time_in_Minutes)), vjust = -0.5) +
  labs(title = "Execution Time for Each Method", y = "Time (Hours)", x = NULL) +
  scale_fill_manual(values = method_colors) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  )

print(time_plot) 

library(patchwork)
mplot <- list()

for (k in c("TPR", "FPR", "Precision", "F1", "MCC")) {
  mplot[[k]] <- data.frame(
    GENIE3_late = scores.genie3.late$Statistics[[k]][[1]],
    GENIE3_early = scores.genie3.early$Statistics[[k]],
    GRNBoost2_late = scores.grnboost.late$Statistics[[k]][[1]],
    GRNBoost2_early = scores.grnboost.early$Statistics[[k]],
    #ZILGM_late = scores.zilgm.late$Statistics[[k]],
    #ZILGM_early = scores.zilgm.early$Statistics[[k]],
    #PCzinb_late = scores.pc.late$Statistics[[k]],
    #PCzinb_early = scores.pc.early$Statistics[[k]],
    JRF = scores.jrf$Statistics[[k]][[1]]
  )
}

mplot[["AUC"]] <- data.frame(
    GENIE3_late = mean(genie3_late_auc$AUC),
    GENIE3_early = genie3_early_auc$AUC,
    GRNBoost2_late = mean(grnboost_late_auc$AUC),
    GRNBoost2_early = grnboost_early_auc$AUC,
    #ZILGM_late = zilgm_late_auc$AUC,
    #ZILGM_early = zilgm_early_auc$AUC,
    JRF = jrf_auc_mine$AUC
)

plot_data <- bind_rows(lapply(names(mplot), function(metric) {
  data.frame(
    Metric = metric,
    Method = names(mplot[[metric]]),
    Value = as.numeric(mplot[[metric]][1, ])
  )
}))

plot_data <- plot_data %>%
  mutate(Method_Group = case_when(
    grepl("GENIE3", Method) ~ "GENIE3",
    grepl("GRNBoost2", Method) ~ "GRNBoost2",
    #grepl("ZILGM", Method) ~ "ZILGM",
    grepl("JRF", Method) ~ "JRF"
  )) %>%
  mutate(Method = factor(Method, levels = c(
    "GENIE3_early", "GENIE3_late", 
    "GRNBoost2_early", "GRNBoost2_late", 
    #"ZILGM_early", "ZILGM_late",
    #"PCzinb_early", "PCzinb_late", 
    "JRF"  # Ensure JRF is last
  )))

plots <- lapply(unique(plot_data$Metric), function(metric) {
  show_x_text <- metric %in% c("Accuracy", "AUC")
  
  ggplot(plot_data %>% filter(Metric == metric), aes(x = Method, y = Value, fill = Method_Group)) +
    geom_bar(stat = "identity", position = position_dodge(width = 0.8)) +
    labs(title = metric, y = "Value", x = NULL) +
    scale_y_continuous(limits = c(0, 1)) +  
    scale_fill_manual(values = method_colors) +
    theme_minimal() +
    theme(
      axis.text.x = if (show_x_text) element_text(angle = 45, hjust = 1) else element_blank(),
      axis.title.x = element_blank(),
      legend.position = "none"  
    )
})

final_plot <- (plots[[1]] | plots[[2]]) /
              (plots[[3]] | plots[[4]]) /
              (plots[[5]] | plots[[6]]) +
  plot_layout(guides = "collect") & 
  theme(legend.position = "bottom")

print(final_plot)

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: x86_64-pc-linux-gnu
## Running under: Ubuntu 20.04.2 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0 
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=it_IT.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=it_IT.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=it_IT.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=it_IT.UTF-8 LC_IDENTIFICATION=C       
## 
## time zone: Europe/Rome
## tzcode source: system (glibc)
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] patchwork_1.3.0             ReactomePA_1.50.0          
##  [3] org.Hs.eg.db_3.20.0         AnnotationDbi_1.68.0       
##  [5] clusterProfiler_4.14.4      ggraph_2.2.1               
##  [7] robin_2.0.0                 igraph_2.1.4               
##  [9] fmsb_0.7.6                  doRNG_1.8.6.1              
## [11] rngtools_1.5.2              foreach_1.5.2              
## [13] GENIE3_1.28.0               BiocParallel_1.40.0        
## [15] scGraphVerse_0.1.0          SingleCellExperiment_1.28.1
## [17] SummarizedExperiment_1.36.0 Biobase_2.66.0             
## [19] GenomicRanges_1.58.0        GenomeInfoDb_1.42.1        
## [21] IRanges_2.40.1              S4Vectors_0.44.0           
## [23] BiocGenerics_0.52.0         MatrixGenerics_1.18.1      
## [25] matrixStats_1.5.0           STRINGdb_2.18.0            
## [27] Seurat_5.2.0                SeuratObject_5.0.2         
## [29] sp_2.1-4                    lubridate_1.9.4            
## [31] forcats_1.0.0               stringr_1.5.1              
## [33] dplyr_1.1.4                 purrr_1.0.4                
## [35] readr_2.1.5                 tidyr_1.3.1                
## [37] tibble_3.2.1                ggplot2_3.5.1              
## [39] tidyverse_2.0.0             DT_0.33                    
## [41] RColorBrewer_1.1-3         
## 
## loaded via a namespace (and not attached):
##   [1] R.methodsS3_1.8.2       gld_2.6.7               iZID_0.0.1             
##   [4] goftest_1.2-3           Biostrings_2.74.1       vctrs_0.6.5            
##   [7] ggtangle_0.0.6          spatstat.random_3.3-2   perturbR_0.1.3         
##  [10] proxy_0.4-27            digest_0.6.37           png_0.1-8              
##  [13] shape_1.4.6.1           Exact_3.3               pcaPP_2.0-5            
##  [16] ggrepel_0.9.6           bst_0.3-24              deldir_2.0-4           
##  [19] parallelly_1.41.0       hdrcde_3.4              MASS_7.3-54            
##  [22] reshape2_1.4.4          qvalue_2.38.0           httpuv_1.6.15          
##  [25] withr_3.0.2             ggfun_0.1.8             xfun_0.50              
##  [28] ggpubr_0.6.0            survival_3.2-11         memoise_2.0.1          
##  [31] gson_0.1.0              learn2count_0.3.2       tidytree_0.4.6         
##  [34] networkD3_0.4           zoo_1.8-12              gtools_3.9.5           
##  [37] pbapply_1.7-2           R.oo_1.27.0             WeightSVM_1.7-16       
##  [40] Formula_1.2-6           KEGGREST_1.46.0         promises_1.3.2         
##  [43] httr_1.4.7              rstatix_0.7.2           globals_0.16.3         
##  [46] hash_2.2.6.3            fitdistrplus_1.2-2      rstudioapi_0.17.1      
##  [49] DOSE_4.0.0              UCSC.utils_1.2.0        miniUI_0.1.1.1         
##  [52] generics_0.1.3          reactome.db_1.89.0      zlibbioc_1.52.0        
##  [55] polyclip_1.10-7         GenomeInfoDbData_1.2.13 SparseArray_1.6.1      
##  [58] xtable_1.8-4            pracma_2.4.4            doParallel_1.0.17      
##  [61] evaluate_1.0.3          JRF_0.1-4               S4Arrays_1.6.0         
##  [64] hms_1.1.3               glmnet_4.1-8            irlba_2.3.5.1          
##  [67] colorspace_2.1-1        ROCR_1.0-11             readxl_1.4.3           
##  [70] reticulate_1.40.0       spatstat.data_3.1-4     magrittr_2.0.3         
##  [73] lmtest_0.9-40           ggtree_3.14.0           later_1.4.1            
##  [76] viridis_0.6.5           lattice_0.20-44         spatstat.geom_3.3-5    
##  [79] future.apply_1.11.3     scattermore_1.2         XML_3.99-0.18          
##  [82] cowplot_1.1.3           RcppAnnoy_0.0.22        class_7.3-19           
##  [85] flux_0.3-0.1            pillar_1.10.1           nlme_3.1-152           
##  [88] iterators_1.0.14        caTools_1.18.3          compiler_4.4.2         
##  [91] RSpectra_0.16-2         stringi_1.8.4           DescTools_0.99.59      
##  [94] ZILGM_0.1.1             tensor_1.5              plyr_1.8.9             
##  [97] mpath_0.4-2.26          fda_6.2.0               crayon_1.5.3           
## [100] abind_1.4-8             gridGraphics_0.5-1      gbm_2.2.2              
## [103] chron_2.3-62            haven_2.5.4             graphlayouts_1.2.2     
## [106] bit_4.5.0.1             rootSolve_1.8.2.4       fastmatch_1.1-6        
## [109] codetools_0.2-18        crosstalk_1.2.1         bslib_0.8.0            
## [112] e1071_1.7-16            lmom_3.2                fds_1.8                
## [115] plotly_4.10.4           mime_0.12               multinet_4.2.1         
## [118] splines_4.4.2           Rcpp_1.0.14             fastDummies_1.7.5      
## [121] cellranger_1.1.0        datastructures_0.2.9    knitr_1.49             
## [124] blob_1.2.4              fs_1.6.5                listenv_0.9.1          
## [127] pscl_1.5.9              expm_1.0-0              ggplotify_0.1.2        
## [130] ggsignif_0.6.4          sqldf_0.4-11            Matrix_1.7-1           
## [133] tzdb_0.4.0              tweenr_2.0.3            pkgconfig_2.0.3        
## [136] network_1.19.0          tools_4.4.2             cachem_1.1.0           
## [139] RSQLite_2.3.9           viridisLite_0.4.2       DBI_1.2.3              
## [142] numDeriv_2016.8-1.1     graphite_1.52.0         distributions3_0.2.2   
## [145] fastmap_1.2.0           rmarkdown_2.29          scales_1.3.0           
## [148] grid_4.4.2              ica_1.0-3               broom_1.0.7            
## [151] sass_0.4.9              INetTool_0.1.0          coda_0.19-4.1          
## [154] dotCall64_1.2           graph_1.84.1            carData_3.0-5          
## [157] RANN_2.6.2              rpart_4.1-15            farver_2.1.2           
## [160] tidygraph_1.3.1         gsubfn_0.7              yaml_2.3.10            
## [163] deSolve_1.40            cli_3.6.3               lifecycle_1.0.4        
## [166] askpass_1.2.1           uwot_0.2.2              rainbow_3.8            
## [169] mvtnorm_1.3-3           backports_1.5.0         timechange_0.3.0       
## [172] gtable_0.3.6            ggridges_0.5.6          progressr_0.15.1       
## [175] ape_5.8-1               parallel_4.4.2          pROC_1.18.5            
## [178] jsonlite_1.8.9          RcppHNSW_0.6.0          bitops_1.0-9           
## [181] bit64_4.6.0-1           Rtsne_0.17              yulab.utils_0.2.0      
## [184] spatstat.utils_3.1-2    proto_1.0.0             jquerylib_0.1.4        
## [187] GOSemSim_2.32.0         R.utils_2.12.3          spatstat.univar_3.1-1  
## [190] lazyeval_0.2.2          shiny_1.10.0            enrichplot_1.26.6      
## [193] htmltools_0.5.8.1       GO.db_3.20.0            sctransform_0.4.1      
## [196] rappdirs_0.3.3          glue_1.8.0              spam_2.11-1            
## [199] XVector_0.46.0          RCurl_1.98-1.16         qpdf_1.3.4             
## [202] treeio_1.30.0           mclust_6.1.1            ks_1.14.3              
## [205] gridExtra_2.3           boot_1.3-28             R6_2.5.1               
## [208] fdatest_2.1.1           gplots_3.2.0            labeling_0.4.3         
## [211] cluster_2.1.2           aplot_0.2.4             statnet.common_4.11.0  
## [214] DelayedArray_0.32.0     tidyselect_1.2.1        plotrix_3.8-4          
## [217] ggforce_0.4.2           car_3.1-3               future_1.34.0          
## [220] munsell_0.5.1           KernSmooth_2.23-20      data.table_1.16.4      
## [223] fgsea_1.32.2            htmlwidgets_1.6.4       rlang_1.1.5            
## [226] spatstat.sparse_3.1-0   spatstat.explore_3.3-4  rentrez_1.2.3